library(ggplot2)
library(dplyr)
library(readr)
library(treemap)
library(ggrepel)
library(plotly)
library(knitr)
library(kableExtra)
Business Question 1
What is the average rating and total number of reviews for each
product?
Business_Question_1 <- read_csv("Business Question 1.csv")
# Select top 15 products
top_15_products <- Business_Question_1 %>%
mutate(score = (avg_rating * 0.6) + (total_reviews / max(total_reviews) * 0.4) * 5) %>%
arrange(desc(score)) %>%
slice_head(n = 15)
# Scatter plot with top 15 products
Business_Question_1_Plot <- ggplot(top_15_products, aes(x = total_reviews, y = avg_rating)) +
geom_point(color = "blue", size = 4) +
geom_text_repel(
aes(label = product_name),
size = 2.5,
max.overlaps = Inf,
box.padding = 0.5,
point.padding = 0.5
) +
labs(
title = "Product Reviews vs Ratings (Top 15)",
x = "Number of Reviews",
y = "Rating"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
plot.margin = margin(1, 1, 1, 1, "cm")
)
(Business_Question_1_Plot)

Business_Question_1_Table <- kable(top_15_products, caption = "Top 15 Products Based on Rating and Review Score")
Business_Question_1_Table
Top 15 Products Based on Rating and Review Score
| bareMinerals |
Original Loose Powder Mineral Foundation Broad Spectrum
SPF 15 |
4.5 |
19000 |
4.700000 |
| Anastasia Beverly Hills |
Brow Wiz |
4.5 |
14000 |
4.173684 |
| Benefit Cosmetics |
They re Real Lengthening Volumizing Mascara |
4.0 |
15000 |
3.978947 |
| Anastasia Beverly Hills |
Modern Renaissance Eye Shadow Palette |
5.0 |
8000 |
3.842105 |
| Anastasia Beverly Hills |
DIPBROW Pomade |
4.5 |
10000 |
3.752632 |
| beautyblender |
the original beautyblender |
4.5 |
8000 |
3.542105 |
| bareMinerals |
Matte Loose Powder Mineral Foundation Broad Spectrum
SPF 15 |
4.5 |
7000 |
3.436842 |
| bareMinerals |
Mineral Veil Setting Powder |
4.5 |
5000 |
3.226316 |
| BECCA |
Shimmering Skin Perfector Pressed Highlighter |
4.5 |
5000 |
3.226316 |
| Benefit Cosmetics |
Boi ing Cakeless Concealer |
4.5 |
5000 |
3.226316 |
| Benefit Cosmetics |
The POREfessional Face Primer |
4.0 |
7000 |
3.136842 |
| Benefit Cosmetics |
The POREfessional Face Primer Mini |
4.0 |
7000 |
3.136842 |
| Anastasia Beverly Hills |
Clear Brow Gel |
4.5 |
4000 |
3.121053 |
| Anastasia Beverly Hills |
Brow Powder Duo |
4.5 |
4000 |
3.121053 |
| bareMinerals |
Broad Spectrum Concealer |
4.5 |
4000 |
3.121053 |
Business Question 2
Which brands have the highest number of products priced above
$50?
Business_Question_2 <- read_csv("Business Question 2.csv") %>%
slice_max(order_by = luxury_products, n = 15)
Business_Question_2_Plot <- ggplot(Business_Question_2, aes(x = reorder(brand, luxury_products), y = luxury_products)) +
geom_col(fill = "darkorange") +
coord_flip() +
labs(
title = "Number of Luxury Products per Brand (Top 15)",
x = "Brand",
y = "Number of Products"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
plot.margin = margin(1, 1, 1, 1, "cm")
)
(Business_Question_2_Plot)

Business_Question_2_Table <- kable(Business_Question_2, caption = "Top 15 Brands with the Highest Number of Luxury Products")
Business_Question_2_Table
Top 15 Brands with the Highest Number of Luxury
Products
| TOM FORD |
135 |
| Dior |
81 |
| Jo Malone London |
63 |
| CHANEL |
50 |
| La Mer |
46 |
| Yves Saint Laurent |
46 |
| Perricone MD |
44 |
| Est e Lauder |
41 |
| Peter Thomas Roth |
41 |
| Atelier Cologne |
39 |
| Dr Barbara Sturm |
39 |
| Fresh |
39 |
| Guerlain |
39 |
| Gucci |
38 |
| Givenchy |
37 |
Business Question 3
List all limited edition products with their prices and ratings
Business_Question_3 <- read_csv("Business Question 3.csv") %>%
slice_max(order_by = price, n = 15)
Business_Question_3_Plot <- ggplot(Business_Question_3, aes(x = price, y = rating)) +
geom_point(color = "purple", size = 4) +
geom_text_repel(
aes(label = product_name),
size = 2.5,
max.overlaps = Inf,
box.padding = 0.5,
point.padding = 0.5
) +
labs(
title = "Limited Edition Products: Price vs Rating (Top 15)",
x = "Price",
y = "Rating"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
plot.margin = margin(1, 1, 1, 1, "cm")
)
(Business_Question_3_Plot)

Business_Question_3_Table <- kable(Business_Question_3, caption = "Top 15 Limited Edition Products with Their Prices and Ratings")
Business_Question_3_Table
Top 15 Limited Edition Products with Their Prices and
Ratings
| SpectraLite Faceware Pro Wrinkle Reducing Duo |
435 |
1.0 |
| Trinity Eye and Lip Enhancer Attachment Bundle |
429 |
3.5 |
| Supersonic Hair Dryer Gift Edition with Red Case |
399 |
4.0 |
| Supersonic Hair Dryer Limited Edition Gift Set |
399 |
3.5 |
| Supersonic Hair Dryer 23 75 Karat Gold |
399 |
4.5 |
| Timeless Beauty Icons Bojagi Set |
324 |
0.0 |
| CAXA |
280 |
0.0 |
| GloPRO Microneedling Facial Regeneration Tool Set |
249 |
4.5 |
| Deep Scarlet Platinum Styler Set |
249 |
5.0 |
| Cobalt Blue Platinum Styler Set Upbeat Collection |
249 |
0.0 |
| Glam Glow Liquid Lights Vault |
235 |
4.0 |
| Bella Sofia Look Set |
230 |
4.5 |
| Pitera Hydrating Essence Set |
229 |
5.0 |
| Flight Essentials Kit |
225 |
3.0 |
| Personal Microderm PRO and PMD Clean Holiday Exclusive
Bundle |
219 |
4.5 |
Business Question 4
What is the total number of online only products in each
category?
Business_Question_4 <- read_csv("Business Question 4.csv") %>%
slice_max(order_by = online_only_count, n = 15)
Business_Question_4_Plot <- ggplot(Business_Question_4, aes(x = reorder(category, online_only_count), y = online_only_count)) +
geom_col(fill = "skyblue") +
coord_flip() +
labs(
title = "Online-Only Products per Category (Top 15)",
x = "Category",
y = "Number of Online-Only Products"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
plot.margin = margin(1, 1, 1, 1, "cm")
)
(Business_Question_4_Plot)

Business_Question_4_Table <- kable(Business_Question_4, caption = "Top 15 Categories with the Highest Number of Online-Only Products")
Business_Question_4_Table
Top 15 Categories with the Highest Number of Online-Only
Products
| Value Gift Sets |
172 |
| Face Serums |
119 |
| Perfume |
113 |
| Moisturizers |
88 |
| Candles Home Scents |
77 |
| Shampoo |
61 |
| Hair Styling Products |
59 |
| Body Lotions Body Oils |
50 |
| Perfume Gift Sets |
50 |
| Beauty Supplements |
50 |
| Face Wash Cleansers |
50 |
| Mini Size |
49 |
| Conditioner |
47 |
| Rollerballs Travel Size |
39 |
| Face Masks |
39 |
| Lotions Oils |
39 |
Business Question 6
Find products that outperform their brand’s average in both ratings
and price efficiency (rating-to-price ratio)
Business_Question_6 <- read_csv("Business Question 6.csv")
# Filter top-performing products
top_performers <- Business_Question_6 %>%
filter(rating > brand_avg_rating, rating_per_dollar > brand_avg_rating_per_dollar) %>%
mutate(performance_score = (rating - brand_avg_rating) +
(rating_per_dollar - brand_avg_rating_per_dollar)) %>%
arrange(desc(performance_score)) %>%
slice(1:10) # Reduced to top 10 for better readability
Business_Question_6_Plot <- ggplot(top_performers, aes(x = reorder(product_name, performance_score), y = performance_score)) +
geom_col(aes(fill = category)) +
geom_text_repel(
aes(label = brand),
size = 2.5,
box.padding = 0.5, # Space between label and bar
point.padding = 0.5, # Space around the label
nudge_x = 0.1, # Slight horizontal shift to avoid overlap
direction = "y", # Keep labels aligned vertically
hjust = 1, # Adjust horizontal justification
segment.color = "grey50" # Color of the connecting line
) +
coord_flip() +
labs(
title = "Top Products Outperforming Brand Averages",
subtitle = "Products exceeding rating and price efficiency metrics",
x = "Product Name",
y = "Performance Score",
fill = "Category"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
plot.subtitle = element_text(hjust = 0.5, size = 10),
plot.margin = margin(1, 2, 1, 1, "cm"), # Increase right margin for label space
legend.position = "bottom", # Move legend to the bottom
axis.text.y = element_text(size = 8) # Adjust the font size for the y-axis labels (product names)
) +
scale_x_discrete(expand = c(0.2, 0)) # Adjusts spacing on x-axis to prevent cutting off
(Business_Question_6_Plot)

# Create and display the table with smaller dimensions using kableExtra
Business_Question_6_Table <- kable(top_performers, caption = "Top Performing Products Based on Rating and Price Efficiency") %>%
kable_styling(
font_size = 7, # Smaller font size
position = "center", # Center the table
bootstrap_options = c("striped", "hover") # Add striped rows and hover effect
) %>%
column_spec(1, width = "3cm") %>% # Adjust the width of the first column (product names)
column_spec(2, width = "3cm") %>% # Adjust the width of the second column (performance score)
column_spec(3, width = "3cm") %>% # Adjust the width of the third column (category)
row_spec(0, bold = TRUE) # Make the header bold
# Display the table
Business_Question_6_Table
Top Performing Products Based on Rating and Price Efficiency
|
product_name
|
brand
|
category
|
rating
|
brand_avg_rating
|
rating_diff_percentage
|
price
|
rating_per_dollar
|
brand_avg_rating_per_dollar
|
category_rating_percentile
|
category_efficiency_percentile
|
number_of_reviews
|
performance_score
|
|
Snag Free Hair Elastics
|
SEPHORA COLLECTION
|
Hair Accessories
|
4.5
|
3.857692
|
16.65
|
4
|
112.50
|
33.69
|
57.14
|
100.00
|
1000
|
79.45231
|
|
Soft Touch Cotton Pads
|
SEPHORA COLLECTION
|
Makeup Removers
|
4.5
|
3.857692
|
16.65
|
4
|
112.50
|
33.69
|
53.85
|
94.87
|
328
|
79.45231
|
|
Mini Dramatically Different Moisturizing Lotion
|
CLINIQUE
|
Skincare
|
4.0
|
3.921610
|
2.00
|
5
|
80.00
|
16.01
|
18.00
|
98.00
|
1000
|
64.06839
|
|
Pencil Sharpener
|
NARS
|
Mirrors Sharpeners
|
4.5
|
4.114035
|
9.38
|
6
|
75.00
|
15.22
|
50.00
|
87.50
|
836
|
60.16596
|
|
Foot Mask
|
SEPHORA COLLECTION
|
Hand Cream Foot Cream
|
4.5
|
3.857692
|
16.65
|
5
|
90.00
|
33.69
|
52.00
|
100.00
|
956
|
56.95231
|
|
Quench Intense Hydration Mask
|
Saturday Skin
|
Face Masks
|
4.5
|
4.466667
|
0.75
|
6
|
75.00
|
25.09
|
40.34
|
93.70
|
773
|
49.94333
|
|
Eye Mask
|
SEPHORA COLLECTION
|
Eye Masks
|
4.0
|
3.857692
|
3.69
|
5
|
80.00
|
33.69
|
25.00
|
91.67
|
827
|
46.45231
|
|
Eye Mask Grape Smoothing
|
SEPHORA COLLECTION
|
Eye Masks
|
4.0
|
3.857692
|
3.69
|
5
|
80.00
|
33.69
|
25.00
|
91.67
|
827
|
46.45231
|
|
Hand Mask
|
SEPHORA COLLECTION
|
Hand Cream Foot Cream
|
4.0
|
3.857692
|
3.69
|
5
|
80.00
|
33.69
|
20.00
|
92.00
|
394
|
46.45231
|
|
The Microdelivery Exfoliating Facial Wash
|
philosophy
|
Face Wash Cleansers
|
4.5
|
3.982143
|
13.00
|
8
|
56.25
|
12.45
|
38.71
|
99.60
|
3000
|
44.31786
|
Business Question 7
Which products contribute the most to their brand’s total revenue in
each category?
Business_Question_7 <- read_csv("Business Question 7.csv") %>%
slice_max(order_by = product_revenue, n = 20)
# Horizontal bar chart for better readability
Business_Question_7_Plot <- ggplot(Business_Question_7, aes(x = reorder(brand, -contribution_percentage), y = contribution_percentage, fill = product_name)) +
geom_bar(stat = "identity") +
labs(
title = "Contribution Percentage to Revenue by Products (Stacked Bar Chart)",
x = "Brand",
y = "Contribution Percentage (%)",
fill = "Product"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom", # Position the legend at the bottom
legend.text = element_text(size = 5.5), # Smaller legend text
legend.title = element_text(size = 8), # Smaller legend title
legend.key.size = unit(0.25, "cm") # Adjust the size of the legend keys
)
(Business_Question_7_Plot)

Business_Question_7_Table <- kable(Business_Question_7, caption = "Top 20 Products Contributing to Brand Revenue")
Business_Question_7_Table
Top 20 Products Contributing to Brand Revenue
| Flowerbomb |
Viktor Rolf |
Perfume |
660000 |
59.01 |
| Black Opium Eau de Parfum |
Yves Saint Laurent |
Perfume |
620000 |
36.64 |
| Original Loose Powder Mineral Foundation Broad Spectrum
SPF 15 |
bareMinerals |
Foundation |
608000 |
31.34 |
| Pro Filt r Soft Matte Longwear Foundation |
FENTY BEAUTY by Rihanna |
Foundation |
525000 |
31.20 |
| Lock It Foundation |
KVD Vegan Beauty |
Foundation |
518000 |
21.27 |
| Blush |
NARS |
Blush |
510000 |
22.03 |
| Alpha Beta Extra Strength Daily Peel |
Dr Dennis Gross Skincare |
Facial Peels |
440000 |
41.23 |
| Ultra HD Invisible Cover Foundation |
MAKE UP FOR EVER |
Foundation |
387000 |
25.09 |
| They re Real Lengthening Volumizing Mascara |
Benefit Cosmetics |
Mascara |
375000 |
17.85 |
| Better Than Sex Mascara |
Too Faced |
Mascara |
375000 |
15.64 |
| Truth Serum |
OLEHENRIKSEN |
Face Serums |
370000 |
25.56 |
| Good Genes All In One Lactic Acid Treatment |
SUNDAY RILEY |
Face Serums |
366000 |
31.45 |
| Tattoo Eyeliner |
KVD Vegan Beauty |
Eyeliner |
357000 |
14.66 |
| Born This Way Foundation |
Too Faced |
Foundation |
351000 |
14.64 |
| 100 percent Pure Argan Oil |
Josie Maran |
Face Oils |
343000 |
35.31 |
| Modern Renaissance Eye Shadow Palette |
Anastasia Beverly Hills |
Eye Palettes |
336000 |
16.27 |
| All Nighter Long Lasting Makeup Setting Spray
Jumbo |
Urban Decay |
Setting Spray Powder |
336000 |
13.02 |
| Radiant Creamy Concealer |
NARS |
Concealer |
330000 |
14.25 |
| 24 7 Glide On Eye Pencil |
Urban Decay |
Eyeliner |
330000 |
12.78 |
| Veil Mineral Primer |
Hourglass |
Face Primer |
324000 |
16.32 |
| Naked3 Palette |
Urban Decay |
Eye Palettes |
324000 |
12.55 |
Business Question 8
Which brands have the highest number of high-rated products (above
4.5) in at least 3 different categories?
Business_Question_8 <- read_csv("Business Question 8.csv") %>%
slice_max(order_by = categories_with_high_rated, n = 15)
Business_Question_8_Plot <- ggplot(Business_Question_8, aes(x = reorder(brand, categories_with_high_rated), y = categories_with_high_rated)) +
geom_col(fill = "darkgreen") +
coord_flip() +
labs(
title = "Brands with High-Rated Products in Multiple Categories (Top 15)",
x = "Brand",
y = "Number of Categories"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
plot.margin = margin(1, 1, 1, 1, "cm")
)
(Business_Question_8_Plot)

Business_Question_8_Table <- kable(Business_Question_8, caption = "Top 15 Brands with High-Rated Products in Multiple Categories")
Business_Question_8_Table
Top 15 Brands with High-Rated Products in Multiple
Categories
| SEPHORA COLLECTION |
26 |
| Dior |
11 |
| CLINIQUE |
10 |
| Kiehl s Since 1851 |
9 |
| TOM FORD |
9 |
| Lanc me |
8 |
| Jack Black |
8 |
| tarte |
7 |
| Shiseido |
7 |
| Proactiv |
7 |
| Jo Malone London |
6 |
| Atelier Cologne |
6 |
| Better Not Younger |
6 |
| innisfree |
6 |
| Yves Saint Laurent |
6 |
| CHANEL |
6 |